import warnings
warnings.filterwarnings("ignore")
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
import tensorflow
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.utils import gen_batches
import networkx as nx
import matplotlib.pyplot as plt
import re
import numpy as np
from IPython.display import display
from matplotlib import pyplot as plt
from wordcloud import WordCloud
import seaborn as sns
import json
# Importing data set
df = pd.read_csv("recipes_data.csv")
df.head(5)
| title | ingredients | directions | link | source | NER | site | |
|---|---|---|---|---|---|---|---|
| 0 | No-Bake Nut Cookies | ["1 c. firmly packed brown sugar", "1/2 c. eva... | ["In a heavy 2-quart saucepan, mix brown sugar... | www.cookbooks.com/Recipe-Details.aspx?id=44874 | Gathered | ["bite size shredded rice biscuits", "vanilla"... | www.cookbooks.com |
| 1 | Jewell Ball'S Chicken | ["1 small jar chipped beef, cut up", "4 boned ... | ["Place chipped beef on bottom of baking dish.... | www.cookbooks.com/Recipe-Details.aspx?id=699419 | Gathered | ["cream of mushroom soup", "beef", "sour cream... | www.cookbooks.com |
| 2 | Creamy Corn | ["2 (16 oz.) pkg. frozen corn", "1 (8 oz.) pkg... | ["In a slow cooker, combine all ingredients. C... | www.cookbooks.com/Recipe-Details.aspx?id=10570 | Gathered | ["frozen corn", "pepper", "cream cheese", "gar... | www.cookbooks.com |
| 3 | Chicken Funny | ["1 large whole chicken", "2 (10 1/2 oz.) cans... | ["Boil and debone chicken.", "Put bite size pi... | www.cookbooks.com/Recipe-Details.aspx?id=897570 | Gathered | ["chicken gravy", "cream of mushroom soup", "c... | www.cookbooks.com |
| 4 | Reeses Cups(Candy) | ["1 c. peanut butter", "3/4 c. graham cracker ... | ["Combine first four ingredients and press in ... | www.cookbooks.com/Recipe-Details.aspx?id=659239 | Gathered | ["graham cracker crumbs", "powdered sugar", "p... | www.cookbooks.com |
df.columns.tolist()
['title', 'ingredients', 'directions', 'link', 'source', 'NER', 'site']
All the fields are related to recipes:-
# Converting from json to array and then to numpy array
df["NER"] = df["NER"].apply(lambda x: np.array(json.loads(x)))
# Assuming your DataFrame is named "df"
df['directions'] = df['directions'].apply(eval) # Convert directions column to list type
df['No_of_Steps_to_make_recipe'] = df['directions'].apply(lambda x: len(x))
df.head(5)
| title | ingredients | directions | link | source | NER | site | No_of_Steps_to_make_recipe | |
|---|---|---|---|---|---|---|---|---|
| 0 | No-Bake Nut Cookies | ["1 c. firmly packed brown sugar", "1/2 c. eva... | [In a heavy 2-quart saucepan, mix brown sugar,... | www.cookbooks.com/Recipe-Details.aspx?id=44874 | Gathered | [bite size shredded rice biscuits, vanilla, br... | www.cookbooks.com | 6 |
| 1 | Jewell Ball'S Chicken | ["1 small jar chipped beef, cut up", "4 boned ... | [Place chipped beef on bottom of baking dish.,... | www.cookbooks.com/Recipe-Details.aspx?id=699419 | Gathered | [cream of mushroom soup, beef, sour cream, chi... | www.cookbooks.com | 3 |
| 2 | Creamy Corn | ["2 (16 oz.) pkg. frozen corn", "1 (8 oz.) pkg... | [In a slow cooker, combine all ingredients. Co... | www.cookbooks.com/Recipe-Details.aspx?id=10570 | Gathered | [frozen corn, pepper, cream cheese, garlic pow... | www.cookbooks.com | 1 |
| 3 | Chicken Funny | ["1 large whole chicken", "2 (10 1/2 oz.) cans... | [Boil and debone chicken., Put bite size piece... | www.cookbooks.com/Recipe-Details.aspx?id=897570 | Gathered | [chicken gravy, cream of mushroom soup, chicke... | www.cookbooks.com | 6 |
| 4 | Reeses Cups(Candy) | ["1 c. peanut butter", "3/4 c. graham cracker ... | [Combine first four ingredients and press in 1... | www.cookbooks.com/Recipe-Details.aspx?id=659239 | Gathered | [graham cracker crumbs, powdered sugar, peanut... | www.cookbooks.com | 3 |
df['ingredients'] = df['ingredients'].apply(eval) # Convert directions column to list type
df['No_of_ingredients_in_recipe'] = df['ingredients'].apply(lambda x: len(x))
df.head(5)
| title | ingredients | directions | link | source | NER | site | No_of_Steps_to_make_recipe | No_of_ingredients_in_recipe | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | No-Bake Nut Cookies | [1 c. firmly packed brown sugar, 1/2 c. evapor... | [In a heavy 2-quart saucepan, mix brown sugar,... | www.cookbooks.com/Recipe-Details.aspx?id=44874 | Gathered | [bite size shredded rice biscuits, vanilla, br... | www.cookbooks.com | 6 | 6 |
| 1 | Jewell Ball'S Chicken | [1 small jar chipped beef, cut up, 4 boned chi... | [Place chipped beef on bottom of baking dish.,... | www.cookbooks.com/Recipe-Details.aspx?id=699419 | Gathered | [cream of mushroom soup, beef, sour cream, chi... | www.cookbooks.com | 3 | 4 |
| 2 | Creamy Corn | [2 (16 oz.) pkg. frozen corn, 1 (8 oz.) pkg. c... | [In a slow cooker, combine all ingredients. Co... | www.cookbooks.com/Recipe-Details.aspx?id=10570 | Gathered | [frozen corn, pepper, cream cheese, garlic pow... | www.cookbooks.com | 1 | 6 |
| 3 | Chicken Funny | [1 large whole chicken, 2 (10 1/2 oz.) cans ch... | [Boil and debone chicken., Put bite size piece... | www.cookbooks.com/Recipe-Details.aspx?id=897570 | Gathered | [chicken gravy, cream of mushroom soup, chicke... | www.cookbooks.com | 6 | 5 |
| 4 | Reeses Cups(Candy) | [1 c. peanut butter, 3/4 c. graham cracker cru... | [Combine first four ingredients and press in 1... | www.cookbooks.com/Recipe-Details.aspx?id=659239 | Gathered | [graham cracker crumbs, powdered sugar, peanut... | www.cookbooks.com | 3 | 5 |
df.sort_values(by='No_of_ingredients_in_recipe')
| title | ingredients | directions | link | source | NER | site | No_of_Steps_to_make_recipe | No_of_ingredients_in_recipe | |
|---|---|---|---|---|---|---|---|---|---|
| 1335130 | Roasted Garlic & Dill Dressing | [1 bulb of garlic 1cup buttermilk 1/2 cup mayo... | [cut top off garlic, drizzle small amount of o... | www.epicurious.com/recipes/member/views/roaste... | Gathered | [garlic] | www.epicurious.com | 1 | 1 |
| 57305 | Cottage Cheese | [any amount sour milk or fresh clabber] | [Heat sour milk to a temperature of about 98° ... | www.cookbooks.com/Recipe-Details.aspx?id=651009 | Gathered | [sour milk] | www.cookbooks.com | 4 | 1 |
| 2223401 | Peg's Strawberry Tarts Recipe | [1 quart. strawberries] | [CREAM FILLING: Beat all ingredients together ... | cookeatshare.com/recipes/peg-s-strawberry-tart... | Recipes1M | [strawberries] | cookeatshare.com | 6 | 1 |
| 1330070 | Brazilian Black Bean Soup | [ingredients] | [Rinse the beans. Cover them with water, and l... | www.epicurious.com/recipes/member/views/brazil... | Gathered | [ingredients] | www.epicurious.com | 11 | 1 |
| 1933284 | Bubble Gum Ice Tea Recipe | [2 ounce Bubble Gum Schnapps fill ice tea] | [Ice // rocks] | cookeatshare.com/recipes/bubble-gum-ice-tea-94464 | Recipes1M | [fill ice tea] | cookeatshare.com | 1 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1997023 | D.I.Y Spice Blends (No3) | [1/2 cup dry mustard powder, 1/2 cup ground tu... | [Mix the ingredients together well, then trans... | cookpad.com/us/recipes/367445-diy-spice-blends... | Recipes1M | [sugar, brown sugar, anise, ground fennel seed... | cookpad.com | 40 | 99 |
| 719917 | Dumplings And Cabbage | [1 c. flour, 1 egg, milk, dash of salt, 1/2 c.... | [If you do not have large potatoes, use more o... | www.cookbooks.com/Recipe-Details.aspx?id=301825 | Gathered | [bread, sugar, brown sugar, fresh mushrooms, r... | www.cookbooks.com | 5 | 223 |
| 27310 | Strawberry Butter | [1 c. powdered sugar, 1 egg yolk, 1/2 c. butte... | [My daughter made this soup, and I thought it ... | www.cookbooks.com/Recipe-Details.aspx?id=301676 | Gathered | [bisquick, bread, sugar, noodles, liquid smoke... | www.cookbooks.com | 1 | 285 |
| 122527 | Layered Salad | [lettuce, torn into small pieces, carrots, gra... | [This recipe was in the drawer for some years,... | www.cookbooks.com/Recipe-Details.aspx?id=301736 | Gathered | [sugar, chunky chicken, curry powder, accent, ... | www.cookbooks.com | 5 | 338 |
| 719239 | Grandma'S Chicken Fricassee | [1 lb. chopped meat, 1 egg, 1/2 c. seasoned br... | [Paula and I collaborated on this recipe, and ... | www.cookbooks.com/Recipe-Details.aspx?id=301776 | Gathered | [american cheese, sugar, noodles, accent, chil... | www.cookbooks.com | 4 | 407 |
2231142 rows × 9 columns
Some recipe as shown in above figure are those where directions are not available.
df.sort_values(by='No_of_Steps_to_make_recipe')
| title | ingredients | directions | link | source | NER | site | No_of_Steps_to_make_recipe | No_of_ingredients_in_recipe | |
|---|---|---|---|---|---|---|---|---|---|
| 1294279 | Broccoli Salad | [2 c. chopped fresh broccoli, 1/2 c. unsalted ... | [Combine first ingredients and fold in with dr... | www.epicurious.com/recipes/member/views/brocco... | Gathered | [fresh broccoli, wine vinegar, dressing, ¼, gr... | www.epicurious.com | 1 | 8 |
| 1575108 | Cape Cod Cocktail | [4 ounces, fluid Cranberry Juice, 1 ounce, flu... | [Pour the cranberry juice and vodka into a hig... | tastykitchen.com/recipes/drinks/cape-cod-cockt... | Gathered | [lime, fluid vodka, cranberry juice] | tastykitchen.com | 1 | 3 |
| 1273781 | Grandmother Cora Horton'S Millionaire Fudge | [4 1/2 c. white sugar, 1 tall can evaporated m... | [Chocolate, nuts and marshmallow stuff into a ... | www.epicurious.com/recipes/member/views/grandm... | Gathered | [white sugar, oleo, marshmallow cream, walnuts... | www.epicurious.com | 1 | 6 |
| 1575110 | Vanilla Cinnamon Smoothie | [1/2 cups Whole Milk, 3/4 cups Plain Greek Yog... | [In a blender, add all ingredients and run on ... | tastykitchen.com/recipes/drinks/vanilla-cinnam... | Gathered | [¼, greek yogurt, cinnamon, milk] | tastykitchen.com | 1 | 4 |
| 522016 | Meringue Cookies | [2 large egg whites, 3/4 c. sugar, 1 tsp. vani... | [Beat egg whites until foamy. Gradually add su... | www.cookbooks.com/Recipe-Details.aspx?id=14161 | Gathered | [sugar, vanilla, egg whites] | www.cookbooks.com | 1 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2012938 | Making Egg Dough Pastas | [DRY, 2 cups all-purpose flour, WET, 9 egg yol... | [Measure the flour and shake it through a siev... | www.epicurious.com/recipes/food/views/making-e... | Recipes1M | [extra-virgin olive oil, wet, egg yolk, eggs, ... | www.epicurious.com | 145 | 21 |
| 1702484 | Cannelloni with Asparagus and Ham | [2 pounds asparagus, 6 tablespoons butter, 1 c... | [Trim 1 inch or more off the butt ends of the ... | www.cookstr.com/recipes/cannelloni-with-aspara... | Recipes1M | [nutmeg, eggs, ham, water, flour, asparagus, m... | www.cookstr.com | 148 | 13 |
| 2210363 | Soft Cheesy Pretzel | [1 1/2 cups flour, all-purpose, 2 teaspoons ba... | [Combine first 4 ingredients in a medium bowl;... | recipeland.com/recipe/v/soft-cheesy-pretzel--5... | Recipes1M | [baking powder, sugar, eggs, cheddar cheese, f... | recipeland.com | 151 | 9 |
| 1739457 | Mike's NY Cheesecake 101 | [30 Graham Crackers (crushed for crust), 4 tbs... | [Preheat oven to 350 (175 degrees C)., Triple ... | cookpad.com/us/recipes/352287-mikes-ny-cheesec... | Recipes1M | [mixing bowl, sugar, vanilla, crackers, eggs, ... | cookpad.com | 163 | 19 |
| 1717085 | Moon Cakes | [1 1/4 cups sugar, 1/4 plus 1/8 teaspoon citri... | [In a small saucepan, combine the sugar, citri... | www.epicurious.com/recipes/food/views/moon-cak... | Recipes1M | [caramel sauce, pork fatback, sugar, eggs, siu... | www.epicurious.com | 164 | 27 |
2231142 rows × 9 columns
# Check for missing values in the dataset
missing_values = df.isnull().sum()
missing_values
title 0 ingredients 0 directions 0 link 0 source 0 NER 0 site 0 No_of_Steps_to_make_recipe 0 No_of_ingredients_in_recipe 0 dtype: int64
There is no missing values in dataset, hence no need to handle missing values. In gereneral following methods are used to handle missing values.
# Check the data types of columns in the dataset
data_types = df.dtypes
data_types
title object ingredients object directions object link object source object NER object site object No_of_Steps_to_make_recipe int64 No_of_ingredients_in_recipe int64 dtype: object
# Check unique values in a specific column
for c in df.columns:
try:
unique_values = df[c].nunique()
except:
continue
print(c, " has ",unique_values, " values ")
title has 1312871 values link has 2231142 values source has 2 values site has 28 values No_of_Steps_to_make_recipe has 127 values No_of_ingredients_in_recipe has 81 values
plt.figure(figsize=(16, 6))
sns.boxplot(x=df['No_of_ingredients_in_recipe'])
plt.show()
df['No_of_ingredients_in_recipe'].min(), df['No_of_ingredients_in_recipe'].mean(), df['No_of_ingredients_in_recipe'].max()
(1, 8.727042474212757, 407)
From above figure it is clear that there are outliers in data for feature No_of_ingredients_in_recipe. The average number of ingredients are around 9, and the maximum number of ingredients are 407, the range is very high, which shows the presence of outliers in data.
max_ingredients = df[df['No_of_ingredients_in_recipe'] == df['No_of_ingredients_in_recipe'].max()]
max_ingredients
| title | ingredients | directions | link | source | NER | site | No_of_Steps_to_make_recipe | No_of_ingredients_in_recipe | |
|---|---|---|---|---|---|---|---|---|---|
| 719239 | Grandma'S Chicken Fricassee | [1 lb. chopped meat, 1 egg, 1/2 c. seasoned br... | [Paula and I collaborated on this recipe, and ... | www.cookbooks.com/Recipe-Details.aspx?id=301776 | Gathered | [american cheese, sugar, noodles, accent, chil... | www.cookbooks.com | 4 | 407 |
max_ingredients.values
array([["Grandma'S Chicken Fricassee",
list(['1 lb. chopped meat', '1 egg', '1/2 c. seasoned bread crumbs', '2 Tbsp. ketchup', '1 tsp. salt', '1/2 tsp. pepper', '6 to 7 potatoes (medium)', '2 1/2 lb. ham end', '1 c. milk', '4 Tbsp. seasoned bread crumbs', '4 Tbsp. butter', '1 lb. ham steak, cubed', '2 c. celery, sliced', '3/4 c. mayonnaise', '1 can chunky chicken soup', '2 Tbsp. lemon juice', '3 eggs, hard-boiled and cut up', '1 (8 oz.) can mushroom stems and pieces', '1 c. walnuts', '1/2 c. butter', '2 c. stuffing mix', '12 oz. corn bread twists', '1 lb. ground ham', '1 can Cheddar cheese soup', '1 c. seasoned bread crumbs', '1 pkg. onion soup mix', '4 Tbsp. butter, divided', '4 Tbsp. flour, divided', '2 tsp. seasoned salt, divided', '2 to 2 1/2 lb. boneless veal', '1 Tbsp. paprika, divided', '1 can chicken broth', '1/2 lb. mushrooms, sliced', '2 medium onions, sliced', '1/4 c. steak sauce', 'hot buttered noodles', '1 roast beef (with or without bones, any size)', '8 oz. ditalini macaroni', '1 (15 oz.) can garbanzo beans', '2 c. cooked chicken, cubed', '8 oz. sour cream', '5 oz. Cheddar cheese soup', '1 tsp. oregano', '1 tsp. garlic powder', '1/2 tsp. seasoned salt', '1/2 tsp. black pepper', '1/2 tsp. soy sauce', '2 to 3 lb. meat loaf mix (1 lb. beef, 1/2 lb. pork and 1/2 lb. veal)', '2 to 3 eggs', '1 c. Italian style bread crumbs', '1/2 c. grated Parmesan cheese', '1 Tbsp. oregano', 'spaghetti sauce', '1 lb. sweet Italian sausage (optional)', '1 1/2 c. instant rice', '1 can chicken broth', '1 lb. shrimp', '1 lb. smoked sausage (link)', '1 c. green pepper, cut in medium pieces', '1 c. celery, sliced diagonally', '1 c. onions, cut fine', '1 (1 lb.) can stewed tomatoes, cut into pieces', '2 cans condensed tomato soup', '6 to 8 drops hot pepper sauce', '1/2 tsp. black pepper', '1 tsp. seasoned salt', '1 tsp. garlic powder', '1 1/2 lb. bulk sweet sausage', '1 (32 oz.) jar spaghetti sauce', '1 can condensed cream of tomato soup', '1 green pepper, cut into small pieces', '1 medium onion, diced fine', '6 lasagne noodles', '1 small eggplant', '4 to 6 Tbsp. olive oil', '1 c. grated Parmesan cheese, divided', '1/4 lb. Mozzarella cheese, sliced thin', '1 medium head cabbage', '1 tsp. salt', '1/2 tsp. pepper', '2 lb. meat loaf mix', '2 pkg. onion soup mix', '1/2 c. instant rice', '2 cans condensed tomato soup', '2 cans water', '1 lb. meat loaf mix (beef, pork and veal)', '1/2 medium onion, chopped fine', '1/2 c. seasoned bread crumbs', '1 egg', '1/4 c. Parmesan cheese, grated', '1/4 c. milk', '1/2 tsp. garlic powder', '1/2 tsp. seasoned salt', '1/4 tsp. pepper', '1 Tbsp. olive oil', '1 jar chicken gravy', '1 c. sour cream', '1 Tbsp. paprika', '1 1/2 lb. ground chuck', '1 c. seasoned bread crumbs', '1 egg', '2 green onions, cut small', '1/2 c. spaghetti sauce', '1/4 tsp. black pepper', '1 (16 oz.) cream-style corn', '1 pkg. Knorr onion soup mix', '6 large green peppers or 8 medium', '1 1/4 lb. ground lamb', '2 c. instant rice (uncooked)', '1 can cream of mushroom soup', '1 (2.62 oz.) jar McCormick Salad Supreme seasoning', '1 large jar spaghetti sauce, divided', '1 lb. lean ground beef', '1 onion, chopped', '2 (16 oz.) cans kidney beans with sauce', '1 (16 oz.) jar salsa (mild)', '1 Tbsp. chili powder', '1 1/2 lb. top round steak, sliced thin', '4 oz. sliced mushrooms (fresh or canned)', '2 Tbsp. butter or margarine', '1 pkg. Lipton dry onion soup mix', '1 Tbsp. soy sauce', '2 Tbsp. flour', '1 c. milk', '1 c. water', '1 1/2 c. green peppers, sliced thin', '2 pork cutlets or boned pork chops', 'cooking oil', 'Accent', 'garlic powder', 'black pepper', '1/2 c. green pepper, in strips', '1/2 c. onion, diced', '1/2 c. celery, sliced', '1 c. stewed tomatoes', '1 c. tomato sauce', '1 Tbsp. basil', '1 (2 lb.) pkg. sauerkraut', '2 lb. spareribs', '2 apples', '1 pkg. onion soup mix', '1 Tbsp. caraway seed', 'salt', 'pepper', 'sugar', "3 to 4 lb. brisket or top round (If you're not sure, ask the butcher or clerk in the supermarket for a pot roast.)", '1 bottle Heinz chili sauce', '1 pkg. onion soup mix (Knorr or Lipton)', '1 can or bottle beer', '1 qt. sauerkraut, drained', '8 oz. noodles, cooked according to pkg. instructions', '2 cans condensed cream of chicken soup', '1 tsp. dry mustard', '1/2 c. mayonnaise', '1/2 c. chopped onions', '16 oz. corned beef (canned is good)', '8 oz. Swiss cheese, grated', '1/2 c. rye bread crumbs*', '2 cans water', '1 lb. super lean ground beef or ground veal', '1 Tbsp. olive oil', '1 (48 oz.) jar Ragu spaghetti sauce with meat', '1 (8 oz.) can tomato sauce', '1 lb. rigatoni, cooked and drained', '1 Tbsp. oregano', '1 tsp. salt', '1 tsp. garlic powder', '1 tsp. onion powder', '1/4 tsp. black pepper', '12 oz. small curd cottage cheese', '1 lb. Mozzarella cheese, sliced', '2 oz. Parmesan cheese', '4 chicken breast halves', '1 egg', '2 Tbsp. milk', 'salt', 'pepper', 'garlic powder', 'corn flakes', '3 oz. potato pancake mix', '2 eggs', '1 c. water', '1 (15 to 16 oz.) can salmon', '1 green onion, cut into small pieces', '1 Tbsp. parsley', '1 tsp. garlic powder', '1/2 tsp. pepper', 'cooking oil', '1 lb. scallops (fresh are best)', '2 Tbsp. butter', '1 tsp. lemon juice', '1 c. Italian seasoned bread crumbs', '1 c. grated American cheese', '1/2 lb. spinach linguini or fettucini', '1/4 c. butter', '1 tsp. garlic', '1/8 tsp. freshly ground black pepper', '1 c. half and half', '1/2 lb. Sea Stix (salad style)', '1/2 c. Parmesan cheese', '1/2 lb. shrimp', '1/2 lb. scallops', '1/2 lb. crab meat or Sea Stix', '1/2 lb. mushrooms, sliced', '1/4 lb. butter', '1/2 pt. heavy cream', '2 Tbsp. sherry', '1 c. sharp Cheddar cheese', '1 tsp. Worcestershire sauce', '1 tsp. garlic powder', '2 Tbsp. flour', '2 Tbsp. bread crumbs', '12 frozen pastry shells', '1 lb. boneless, skinless chicken breasts', '2 egg yolks', '1 c. ice water', '1 c. flour', '2 Tbsp. sesame oil', '1 Tbsp. soy sauce', '2 Tbsp. honey', '2 Tbsp. sweet and sour sauce', '1 Tbsp. sesame seed', '3/4 lb. shrimp', '2 Tbsp. butter', '1 tsp. garlic powder*', '2 Tbsp. seasoned bread crumbs', 'oregano', '8 oz. spaghetti', '2 Tbsp. butter', '1/2 c. grated Parmesan cheese', '2 eggs, well beaten', '12 oz. cottage cheese', '1 1/4 to 1 1/2 lb. meat loaf mix (beef, veal and pork)', '1/2 c. onion, chopped', '1/2 c. green pepper, chopped', '1 (8 oz.) can sliced tomatoes', '1 (8 oz.) can tomato sauce', '1 tsp. oregano', '1/2 tsp. garlic powder', '1/2 c. shredded Mozzarella cheese', '12 lasagna noodles', '2 Tbsp. olive oil', '2 lb. ground beef', '1/2 c. onion, minced', '2 (6 oz.) cans tomato paste', '1 (15 oz.) can tomato sauce', '2 tsp. basil', '2 tsp. parsley', '2 tsp. sugar', '1 tsp. salt', '1 tsp. oregano', '1/2 tsp. garlic salt', '1/4 tsp. pepper', '16 oz. Ricotta', '8 oz. Mozzarella, sliced', '1 tsp. salt', '1/2 tsp. pepper', '1 lb. boneless pork', '1/2 c. soy sauce', '1 tsp. ginger', '16 oz. Japanese style vegetables or 2 pkg. stir-fry Japanese style vegetables', 'chow mein noodles', '6 green peppers', '1 1/2 lb. meat loaf mix (beef, veal and pork)', '4 eggs', '3/4 c. instant rice', '3/4 c. grated cheese', '1 1/2 tsp. black pepper', '1 1/2 tsp. garlic powder', '1 1/2 tsp. oregano', '1 large jar spaghetti sauce', '1 lb. jumbo shells', '1 1/2 lb. ground beef', '1 medium onion, chopped', '1 egg', '1/2 c. seasoned bread crumbs', '1/4 c. grated Parmesan cheese', '1 tsp. salt', '1/2 tsp. pepper', '16 oz. spaghetti sauce', '3 large zucchini', '2 Tbsp. olive oil', '1 1/4 to 1 1/2 lb. meat loaf mix (beef, pork and veal)', '1 large onion', '1 Tbsp. oregano', '1 tsp. garlic powder', '1 (6 oz.) pkg. chicken flavored rice pilaf mix', '1 (32 oz.) jar spaghetti sauce, divided', '1 c. grated Parmesan cheese', '4 Tbsp. cooking oil', '1 Tbsp. garlic powder', '1 1/4 to 1 1/2 lb. boneless pork, cut into 1-inch cubes', '1 medium onion, chopped', '1 green pepper, cut into 1-inch pieces', '1/2 red pepper (sweet), cut into 1-inch pieces', '1 (15 oz.) can pineapple chunks in natural syrup', '1/4 c. honey', '1/4 c. soy sauce', '2 Tbsp. cider vinegar', '2 to 3 Tbsp. cornstarch', 'chow mein noodles', '1/2 lb. wide noodles', '1 (12 oz.) can Cheddar cheese soup', '6 oz. milk', '1 (5 oz.) can evaporated milk', '2 Tbsp. butter', '2 (6 oz.) cans solid white tuna', '1/2 sweet red pepper, cut up into small pieces', '1 (8 oz.) can sweet peas, drained', '1 Tbsp. Dijon mustard', '1/2 tsp. black pepper', '1 (3 oz.) can French fried onions', '2 medium onions', '1/4 c. butter', '2 (6 1/2 oz.) cans tuna, drained', '2 c. shredded Cheddar, divided', '1 c. baking mix', '1 1/4 c. milk', '1 tsp. seafood seasoning', '3 eggs', '2 medium tomatoes, thinly sliced', '1 (8 oz.) pkg. frozen green peas in cream sauce', '1 c. milk', '2 c. turkey, cubed', '1 1/2 c. mushrooms, sliced', '1 Tbsp. butter', '1 Tbsp. grated Parmesan cheese', '1/2 tsp. garlic powder', '1/2 tsp. black pepper', '1 pkg. Betty Crocker fettuccine Alfredo Tuna Helper', '1 c. cooked turkey or chicken, cut into bite size pieces', '2 lb. veal boneless shoulder, cubed', '2 Tbsp. olive oil', '2 large green peppers, cut into strips', '1 large red pepper, cut into strips', '4 green onions, cut into small pieces', '1/2 lb. mushrooms, sliced', '1 (32 oz.) jar spaghetti sauce', '2 Tbsp. grated Parmesan cheese', '1 tsp. oregano', '3 large zucchini', '4 Tbsp. olive oil, divided', '1 (32 oz.) jar spaghetti sauce, divided', '1 1/2 lb. meat loaf mix (beef, pork and veal)', '1/2 c. chopped onion', '1 Tbsp. oregano', '1 tsp. garlic powder', '6 oz. chicken flavored rice pilaf mix', '1/4 lb. sliced Mozzarella cheese', '1 c. grated Parmesan cheese', '2 lb. fresh asparagus spears or canned asparagus', '1 (10 oz.) can cream of asparagus soup', '1/2 c. sour cream', '1 can French fried onions', '1 lb. frozen cauliflower', '2 (1 lb.) cans asparagus pieces', '1 medium onion, diced fine', '1 can condensed mushroom soup', '4 oz. grated Cheddar cheese', '2 oz. slivered almonds', '1 head broccoli*', '1 c. water', '1/2 tsp. salt', '1 (10 oz.) can broccoli cheese soup', '2 Tbsp. water', '4 oz. sour cream', '1/2 can French fried onions', '1 lb. frozen carrots, sliced', '1/4 lb. butter (1 stick)', '1 (8 oz.) can tomato sauce', '1/4 c. sugar', '1 green onion, diced', '1/4 green pepper, diced', '1 stalk celery, sliced', '1 lb. frozen cauliflower florets', '3 Tbsp. butter', '2 Tbsp. seasoned bread crumbs', '1 Tbsp. sherry', '2 Tbsp. grated cheese', '1 lb. frozen cauliflower florets', '1 can cream of mushroom soup', '4 oz. sour cream', '1/2 c. milk', '1/4 tsp. black pepper', '1/4 tsp. garlic powder', '2 Tbsp. grated Parmesan cheese', '1 (2.8 oz.) can French fried onions, divided', '1 lb. fresh asparagus', '1 Tbsp. sesame seed', '2 Tbsp. butter', '1 tsp. sesame oil', '1 tsp. soy sauce', '4 slices bacon', '2 green onions', '1 (10 oz.) pkg. frozen carrots and peas', '1/2 c. water', '1 1/2 c. shredded lettuce', '1 Tbsp. parsley flakes', '1 tsp. seasoned salt', '1 tsp. garlic powder', '1/2 tsp. pepper', '1/4 c. butter', '1/2 c. onions, chopped', '4 c. cabbage, sliced', '1/2 tsp. salt', '1/4 tsp. pepper', '2 chicken bouillon cubes, crushed']),
list(['Paula and I collaborated on this recipe, and it was fun.', 'This dish is quite different from my usual fare, but my wonderful wife said it is popular in mill towns.', "I'll list it under vegetables, but it could be used as a main dish.", 'Try it, I think you will be quite pleased.']),
'www.cookbooks.com/Recipe-Details.aspx?id=301776', 'Gathered',
array(['american cheese', 'sugar', 'noodles', 'accent', 'chili powder',
'potato pancake mix', 'green onion', 'sweet and', 'baking mix',
'sweet red pepper', 'almonds', 'sweet peas', 'cabbage', 'rigatoni',
'frozen carrots', 'garlic salt', 'water', 'walnuts', 'ground beef',
'mustard', 'fettucini', 'sour cream', 'wide noodles',
'ditalini macaroni', 'cream of mushroom soup', 'lasagna noodles',
'red pepper', 'ground chuck', 'onions', 'eggplant', 'milk',
'apples', 'sherry', 'cottage cheese', 'beef', 'carrots',
'fresh asparagus spears', 'jumbo shells', 'onion soup', 'salmon',
'parsley flakes', 'pork cutlets', 'pepper sauce',
'salad supreme seasoning', 'shrimp', 'garlic',
'freshly ground black pepper', 'pastry shells', 'meat',
'soy sauce', 'mein noodles', 'chicken gravy', 'tuna',
'italian style bread crumbs', 'swiss cheese', 'turkey',
'bread crumbs', 'pineapple', 'chopped meat', 'potatoes',
'kidney beans', 'sweet italian sausage', 'steak sauce',
'shredded lettuce', 'tomato soup', 'basil', 'garlic powder',
'super lean ground beef', 'cheddar', 'chicken bouillon cubes',
'chicken', 'green pepper', 'honey', 'ground lamb', 'spaghetti',
'ground ham', 'oregano', 'corn', 'cooking oil', 'solid white tuna',
'japanese style vegetables', 'corned beef', 'grated cheese',
'spaghetti sauce', 'frozen cauliflower', 'cheddar cheese soup',
'worcestershire sauce', 'stuffing mix', 'mix', 'scallops',
'broccoli cheese soup', 'eggs', 'mozzarella cheese', 'rye bread',
'salad style', 'cornstarch', 'broccoli', 'butter',
'cream of asparagus soup', 'chicken breasts', 'tomato paste',
'mozzarella', 'ginger', 'celery', 'sausage', 'helper',
'lemon juice', 'cheddar cheese', 'onion powder', 'onion',
'chicken soup', 'crab meat', 'flour', 'chicken broth', 'paprika',
'onion soup mix', 'salt', 'black pepper', 'heavy cream', 'brisket',
'spareribs', 'frozen green peas', 'corn bread',
'hot buttered noodles', 'instant rice', 'dry mustard', 'ham steak',
'boneless veal', 'sesame seed', 'egg', 'condensed cream',
'tomatoes', 'bulk sweet sausage', 'garbanzo beans', 'corn flakes',
'fresh asparagus', 'head cabbage', 'cheese', 'cider vinegar',
'ham end', 'tomato sauce', 'zucchini', 'mushroom stems',
'chili sauce', 'parmesan cheese', 'mushrooms', 'salsa',
'sesame oil', 'sauerkraut', 'green onions',
'chicken flavored rice pilaf mix', 'thin', 'ricotta', 'ketchup',
'egg yolks', 'olive oil', 'bacon', 'pepper', 'green peppers',
'caraway seed', 'seafood seasoning', 'lean ground beef',
'mayonnaise', 'parsley', 'italian seasoned bread crumbs',
'condensed mushroom soup', 'boneless pork'], dtype='<U31') ,
'www.cookbooks.com', 4, 407]], dtype=object)
In some recipes as shown in above image, does not have proper directions (description) is written in some recipe.
df.head(1).values
array([['No-Bake Nut Cookies',
list(['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla', '1/2 c. broken nuts (pecans)', '2 Tbsp. butter or margarine', '3 1/2 c. bite size shredded rice biscuits']),
list(['In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine.', 'Stir over medium heat until mixture bubbles all over top.', 'Boil and stir 5 minutes more. Take off heat.', 'Stir in vanilla and cereal; mix well.', 'Using 2 teaspoons, drop and shape into 30 clusters on wax paper.', 'Let stand until firm, about 30 minutes.']),
'www.cookbooks.com/Recipe-Details.aspx?id=44874', 'Gathered',
array(['bite size shredded rice biscuits', 'vanilla', 'brown sugar',
'nuts', 'milk', 'butter'], dtype='<U32') ,
'www.cookbooks.com', 6, 6]], dtype=object)
plt.figure(figsize=(16, 6))
sns.boxplot(x=df['No_of_Steps_to_make_recipe'])
plt.show()
df['No_of_Steps_to_make_recipe'].min(), df['No_of_Steps_to_make_recipe'].mean(), df['No_of_Steps_to_make_recipe'].max()
(1, 6.61362790893632, 164)
The average number of steps to make recipe is 7, but there maximum value is 164. This high rangle of average and maximum and above box plot shows that there are outliers means some recipe (a very small portion) takes very long time as compared to most of the recipes.
Textual data such as ingredient names contains unnecessary characters or symbols which needs to be removed before analysis.
def labeled_barplot(data, feature, perc=False, n=None, title=None):
"""
Barplot with percentage at the top
data: dataframe
feature: dataframe column
perc: whether to display percentages instead of count (default is False)
n: displays the top n category levels (default is None, i.e., display all levels)
"""
total = len(data[feature]) # length of the column
count = data[feature].nunique()
if n is None:
plt.figure(figsize=(count + 2, 6))
else:
plt.figure(figsize=(n + 2, 6))
plt.xticks(rotation=90, fontsize=15)
ax = sns.countplot(
data=data,
x=feature,
palette="Paired",
order=data[feature].value_counts().index[:n],
)
for p in ax.patches:
if perc == True:
label = "{:.1f}%".format(
100 * p.get_height() / total
) # percentage of each class of the category
else:
label = p.get_height() # count of each level of the category
x = p.get_x() + p.get_width() / 2 # width of the plot
y = p.get_height() # height of the plot
ax.annotate(
label,
(x, y),
ha="center",
va="center",
size=12,
xytext=(0, 5),
textcoords="offset points",
) # annotate the percentage
plt.title(title)
plt.show() # show the plot
labeled_barplot(df, "title", perc=True, n=20, title="Top 20 Recipes")
df[df['title'] == "Chicken Casserole"]
| title | ingredients | directions | link | source | NER | site | No_of_Steps_to_make_recipe | No_of_ingredients_in_recipe | |
|---|---|---|---|---|---|---|---|---|---|
| 63 | Chicken Casserole | [1 can cream of mushroom soup, 1 can cream of ... | [Mix all ingredients together in baking dish.,... | www.cookbooks.com/Recipe-Details.aspx?id=665397 | Gathered | [cream of chicken soup, cream of mushroom soup... | www.cookbooks.com | 3 | 6 |
| 131 | Chicken Casserole | [1/2 c. raw rice, 1 can French onion soup, 1 c... | [Mix all together; pour into a 7 x 12-inch cas... | www.cookbooks.com/Recipe-Details.aspx?id=661800 | Gathered | [cream of chicken, onion soup, mushrooms, bite... | www.cookbooks.com | 3 | 5 |
| 356 | Chicken Casserole | [1 1/2 c. uncooked rice, 1 envelope onion soup... | [Spread rice in bottom of a 9 x 13-inch glass ... | www.cookbooks.com/Recipe-Details.aspx?id=419575 | Gathered | [onion soup, cream of mushroom soup, water, ri... | www.cookbooks.com | 4 | 5 |
| 428 | Chicken Casserole | [1 large pkg. spinach noodles, cooked in broth... | [Cook chicken. Saute celery, pepper and onion.... | www.cookbooks.com/Recipe-Details.aspx?id=7283 | Gathered | [velveeta cheese, chicken breasts, spinach noo... | www.cookbooks.com | 1 | 8 |
| 539 | Chicken Casserole | [4 c. cooked chicken, 1 c. chopped celery, 1 c... | [Mix first nine ingredients., Saute 5 minutes ... | www.cookbooks.com/Recipe-Details.aspx?id=115061 | Gathered | [cream of chicken soup, slivered almonds, lemo... | www.cookbooks.com | 4 | 12 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2155999 | Chicken Casserole | [1 lb Chicken Breast (cubed), 5 Carrots (slice... | [Preheat oven to 350F, Boil carrots, Cube chic... | cookpad.com/us/recipes/346285-chicken-casserole | Recipes1M | [cream of chicken soup, sour cream, corn, ranc... | cookpad.com | 8 | 12 |
| 2169171 | Chicken Casserole | [1 (6 ounce) packagestove top chicken flavor s... | [Preheat oven to 350 degrees F., Prepare stuff... | www.food.com/recipe/chicken-casserole-268828 | Recipes1M | [velveeta cheese, condensed cream, chicken fla... | www.food.com | 6 | 4 |
| 2199641 | Chicken Casserole | [2 cups cooked egg noodles, 2 (10 3/4 ounce) c... | [Preheat oven to 350., In a large bowl, toss t... | www.food.com/recipe/chicken-casserole-228128 | Recipes1M | [pimiento, cream of mushroom soup, sherry wine... | www.food.com | 5 | 10 |
| 2214015 | Chicken Casserole | [1 12 loaves artisan bread (example-sourdough ... | [At least 8 hours before you plan to serve thi... | www.food.com/recipe/chicken-casserole-522180 | Recipes1M | [bread, chicken breasts, unsalted butter, heav... | www.food.com | 25 | 17 |
| 2226534 | Chicken Casserole | [1 tablespoon butter, 1 tablespoon olive oil, ... | [Saute leek, onion and garlic in melted butter... | www.food.com/recipe/chicken-casserole-171504 | Recipes1M | [corn, ground cumin, chicken breasts, potato, ... | www.food.com | 10 | 18 |
4099 rows × 9 columns
labeled_barplot(df, "source", perc=True, n=2, title="source")
labeled_barplot(df, "site", perc=True, n=20, title="Top 20 site")
labeled_barplot(df, "No_of_Steps_to_make_recipe", perc=True, n=20, title="Top 20 No_of_Steps_to_make_recipe")
Most of the recipes required around 1 to 8 steps to make recipes. THere are some recipes which required large number of steps.
labeled_barplot(df, "No_of_ingredients_in_recipe", perc=True, n=20, title="Top 20 No_of_ingredients_in_recipe")
Most recipes consist of ingredients range from 4 to 11. Some recipes consist of very large number of ingredients.
Here i have plotted the ingredients and their usage in recipes, which shows the importance of recipes. Egg,sugar and salt being the most important ingredients used in recipes.
ner = df.ingredients.explode().reset_index()
labeled_barplot(ner, "ingredients", perc=True, n=20, title="Top 20 ingredients")
Use the explode() method and then call the unique() method.
ingredients = df.ingredients.explode().unique()
ingredients
array(['1 c. firmly packed brown sugar', '1/2 c. evaporated milk',
'1/2 tsp. vanilla', ..., '1/2 cup very cold seltzer water',
'1 cup neutral oil, like canola',
'1/2 cup freshly grated Pecorino Romano cheese, plus more for sprinkling'],
dtype=object)
print(f"Total Unique Ingredients are {len(ingredients)}")
Total Unique Ingredients are 4682802
stopwords = ['dtype', 'array', 'of', 'the', 'in', 'with', 'or', 'whole', 'all', 'a', 'that', 'using', 'at',
'and','for','to','until','is', 'then', 'on',"'","U22'","U13'","U32'","U27'","U37'"]
def minimal_wordcloud(df, column):
text = str(df[column].values)
wordcloud = WordCloud(width=1000, height=500, stopwords = stopwords).generate(text)
image = wordcloud.to_image()
return image
minimal_wordcloud(df, 'title')
minimal_wordcloud(df, 'ingredients')
minimal_wordcloud(df, 'directions')
minimal_wordcloud(df, 'NER')
ingredient_uses = df[["title", "NER"]].explode("NER")
ingredient_uses = ingredient_uses.rename(columns={"NER": "ingredient"})
ingredient_uses
| title | ingredient | |
|---|---|---|
| 0 | No-Bake Nut Cookies | bite size shredded rice biscuits |
| 0 | No-Bake Nut Cookies | vanilla |
| 0 | No-Bake Nut Cookies | brown sugar |
| 0 | No-Bake Nut Cookies | nuts |
| 0 | No-Bake Nut Cookies | milk |
| ... | ... | ... |
| 2231141 | Polpette in Spicy Tomato Sauce | tomato sauce |
| 2231141 | Polpette in Spicy Tomato Sauce | garlic |
| 2231141 | Polpette in Spicy Tomato Sauce | sausage |
| 2231141 | Polpette in Spicy Tomato Sauce | bread crumbs |
| 2231141 | Polpette in Spicy Tomato Sauce | salt |
18420503 rows × 2 columns
This portion shows the ingredient and top 10 similar or related ingredients.
all_ingredients = ingredient_uses.ingredient.value_counts().head(20).index.tolist()
for i in all_ingredients:
# Find id's of recipes using the ingredient
id_list = ingredient_uses[ingredient_uses.ingredient == i].index.unique()
# Find all rows for the above id's and do value_counts on those rows
print(i, "with", ingredient_uses[ingredient_uses.index.isin(id_list)].ingredient.value_counts().head(10).index.tolist()[1:])
print()
salt with ['flour', 'sugar', 'butter', 'eggs', 'onion', 'garlic', 'milk', 'water', 'pepper'] sugar with ['salt', 'flour', 'eggs', 'vanilla', 'butter', 'milk', 'water', 'baking powder', 'baking soda'] butter with ['salt', 'flour', 'sugar', 'eggs', 'milk', 'vanilla', 'brown sugar', 'onion', 'baking powder'] flour with ['salt', 'sugar', 'eggs', 'butter', 'vanilla', 'milk', 'baking powder', 'baking soda', 'egg'] eggs with ['salt', 'sugar', 'flour', 'butter', 'vanilla', 'milk', 'baking powder', 'baking soda', 'brown sugar'] onion with ['salt', 'garlic', 'pepper', 'water', 'butter', 'tomatoes', 'celery', 'olive oil', 'milk'] garlic with ['salt', 'onion', 'olive oil', 'tomatoes', 'water', 'butter', 'pepper', 'parsley', 'oregano'] milk with ['salt', 'sugar', 'flour', 'butter', 'eggs', 'vanilla', 'baking powder', 'onion', 'egg'] water with ['salt', 'sugar', 'flour', 'onion', 'butter', 'garlic', 'eggs', 'milk', 'pepper'] vanilla with ['sugar', 'flour', 'eggs', 'salt', 'butter', 'milk', 'baking powder', 'baking soda', 'brown sugar'] olive oil with ['salt', 'garlic', 'onion', 'tomatoes', 'water', 'pepper', 'parsley', 'butter', 'lemon juice'] pepper with ['salt', 'onion', 'garlic', 'butter', 'milk', 'water', 'olive oil', 'flour', 'tomatoes'] brown sugar with ['salt', 'flour', 'butter', 'vanilla', 'eggs', 'sugar', 'baking soda', 'cinnamon', 'baking powder'] egg with ['salt', 'flour', 'sugar', 'butter', 'milk', 'vanilla', 'baking powder', 'baking soda', 'brown sugar'] tomatoes with ['salt', 'garlic', 'onion', 'olive oil', 'water', 'pepper', 'onions', 'oregano', 'parsley'] baking powder with ['flour', 'salt', 'sugar', 'eggs', 'vanilla', 'butter', 'milk', 'baking soda', 'egg'] lemon juice with ['salt', 'sugar', 'garlic', 'butter', 'olive oil', 'water', 'flour', 'onion', 'eggs'] cinnamon with ['sugar', 'salt', 'flour', 'eggs', 'butter', 'vanilla', 'brown sugar', 'nutmeg', 'baking soda'] sour cream with ['salt', 'butter', 'onion', 'sugar', 'eggs', 'flour', 'cheddar cheese', 'cream cheese', 'garlic'] baking soda with ['flour', 'salt', 'sugar', 'eggs', 'vanilla', 'butter', 'baking powder', 'brown sugar', 'egg']
all_ingredients = ingredient_uses.ingredient.value_counts().head(20).index.tolist()
network_data = []
for i in all_ingredients:
# Find id's of recipes using the ingredient
id_list = ingredient_uses[ingredient_uses.ingredient == i].index.unique()
# Find all rows for the above id's and do value_counts on those rows
for j in ingredient_uses[ingredient_uses.index.isin(id_list)].ingredient.value_counts().head(10).index.tolist()[1:]:
i = " ".join(re.findall(r'[a-zA-Z]+', i))
j = " ".join(re.findall(r'[a-zA-Z]+', j))
network_data.append([i, j])
network_data = pd.DataFrame(data=network_data, columns=['Ingredient', 'Related Ingredients'])
network_data.head()
| Ingredient | Related Ingredients | |
|---|---|---|
| 0 | salt | flour |
| 1 | salt | sugar |
| 2 | salt | butter |
| 3 | salt | eggs |
| 4 | salt | onion |
network_data = network_data.drop_duplicates()
network_data = network_data[network_data["Ingredient"] != network_data["Related Ingredients"]]
def plot_network(dataframe, ingredient_name=None):
# Create a directed graph
G = nx.DiGraph()
if ingredient_name is not None:
dataframe = dataframe[dataframe['Ingredient']==ingredient_name]
else:
ingredient_name = ""
# Add nodes and edges from the dataframe
for index, row in dataframe.iterrows():
ingredient = row['Ingredient']
related_ingredients = row['Related Ingredients']
G.add_node(ingredient, label=ingredient) # Add 'label' attribute to store the node name
G.add_node(related_ingredients, label=related_ingredients) # Add 'label' attribute to store the node name
G.add_edge(ingredient, related_ingredients)
# Plot the network
plt.figure(figsize=(20, 13))
pos = nx.spring_layout(G, seed=42)
node_labels = nx.get_node_attributes(G, 'label')
nx.draw_networkx(G, pos, with_labels=True, node_size=5000, node_color='lightblue', edge_color='gray', font_size=12, labels=node_labels) # Use 'labels' argument to display node names
plt.title("Related Ingredients of "+ingredient_name)
# Show the plot
plt.axis('off')
plt.show()
plot_network(network_data)
plot_network(network_data, "soda")
I have used only 10000 records for recommendation system
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df["NER"].astype(str)[1:-1][:10000])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
df_small = df.reset_index()[:10000]
titles = df_small['title']
indices = pd.Series(df_small.index, index=df_small['title'])
df_small['title'].values
array(['No-Bake Nut Cookies', "Jewell Ball'S Chicken", 'Creamy Corn', ...,
'Chicken Casserole', 'Sweet Potatoes Casserole', '7 Layer Salad'],
dtype=object)
def get_recommendations(title, no_of_recipes):
idx = indices[title]
sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:31]
recipe_indices = [i[0] for i in sim_scores]
return titles.iloc[recipe_indices].head(no_of_recipes)
recommended_recipes = get_recommendations("Sweet Potatoes Casserole", 10)
recommended_recipes
5359 The "Bestest" Banana Bread 7703 Custard Sauce 623 Baked Pork Tenderloin 3164 Broccoli And Cheese Casserole 3842 Strawberry Pizza 1952 Pumpkin Bread 5075 Crustless Egg Custard 2005 Cathedral Window Candy 2385 Quick Dessert 5969 Chicken Lasagna Name: title, dtype: object
recommended_recipes = get_recommendations("Creamy Corn", 5)
recommended_recipes
8100 Peanut Butter Pie 3695 Baked Halibut, Italian Style 9641 Chinese Chicken Wings 446 Sour Cream Pound Cake 9470 Broccoli Casserole Name: title, dtype: object
If in my kitchen, I have following list of ingredients let say:-
Which recipe I can make out of available ingredients? this system will help us to assist in making recipes.
# Recipe data
recipe_data = df.head(1000)
def find_best_recipe(ingredients):
best_recipe = None
best_match_count = 0
for _, recipe in recipe_data.iterrows():
recipe_ingredients = recipe['ingredients']
match_count = sum(1 for ingredient in ingredients if ingredient in recipe_ingredients)
if match_count > best_match_count:
best_recipe = recipe
best_match_count = match_count
return best_recipe
def print_recipe_directions(recipe, ingredients):
directions = recipe['directions']
print("You have ingredients:\n")
print("\n".join(ingredients))
print("\n")
print("If you arrange following ingredient(s) then you can make Recipe:\n")
extra = set(recipe['ingredients']).difference(set(ingredients))
print("\n".join(extra))
print("\n")
print("Recipe Directions:\n")
print("\n".join(directions))
# List of ingredients
input_ingredients = ['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla']
# Find the best recipe
best_recipe = find_best_recipe(input_ingredients)
# Print the best recipe directions
if best_recipe is not None:
print_recipe_directions(best_recipe, input_ingredients)
else:
print("No matching recipe found.")
You have ingredients: 1 c. firmly packed brown sugar 1/2 c. evaporated milk 1/2 tsp. vanilla If you arrange following ingredient(s) then you can make Recipe: 1/2 c. broken nuts (pecans) 3 1/2 c. bite size shredded rice biscuits 2 Tbsp. butter or margarine Recipe Directions: In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine. Stir over medium heat until mixture bubbles all over top. Boil and stir 5 minutes more. Take off heat. Stir in vanilla and cereal; mix well. Using 2 teaspoons, drop and shape into 30 clusters on wax paper. Let stand until firm, about 30 minutes.
# List of ingredients
input_ingredients = ['1 box powdered sugar', '8 oz. soft butter', '1 (8 oz.) peanut butter', 'paraffin']
# Find the best recipe
best_recipe = find_best_recipe(input_ingredients)
# Print the best recipe directions
if best_recipe is not None:
print_recipe_directions(best_recipe, input_ingredients)
else:
print("No matching recipe found.")
You have ingredients: 1 box powdered sugar 8 oz. soft butter 1 (8 oz.) peanut butter paraffin If you arrange following ingredients then you can make Recipe: 12 oz. chocolate chips Recipe Directions: Mix sugar, butter and peanut butter. Roll into balls and place on cookie sheet. Set in freezer for at least 30 minutes. Melt chocolate chips and paraffin in double boiler. Using a toothpick, dip balls 3/4 of way into chocolate chip and paraffin mixture to make them look like buckeyes.
One possible approach for generative AI is to use a text generation model called a Recurrent Neural Network (RNN) with the Long Short-Term Memory (LSTM) architecture. The LSTM model can be trained on the recipe directions from the dataset and then used to generate new recipe directions.
We have used recipes ingredients as dataset for LSTM
Tokenization is applied on dataset
We have trained Deep Neural Network to create system that will generate directions for recipe by giving starting point.
# Preprocess the recipe directions
directions = df['directions'].head(1000).values.tolist()
# Text preprocessing
tokenizer = Tokenizer()
tokenizer.fit_on_texts(directions)
total_words = len(tokenizer.word_index) + 1
input_sequences = []
for line in directions:
token_list = tokenizer.texts_to_sequences([line])[0]
for i in range(1, len(token_list)):
n_gram_sequence = token_list[:i+1]
input_sequences.append(n_gram_sequence)
# Pad sequences for input to the model
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')
# Create predictors and label
predictors, label = input_sequences[:, :-1], input_sequences[:, -1]
# Convert label to one-hot encoding
label = tensorflow.keras.utils.to_categorical(label, num_classes=total_words)
# Build the model
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# Train the model
model.fit(predictors, label, epochs=100, verbose=2)
Epoch 1/100 128/128 - 10s - loss: 8.4358 - 10s/epoch - 82ms/step Epoch 2/100 128/128 - 7s - loss: 8.4406 - 7s/epoch - 52ms/step Epoch 3/100 128/128 - 7s - loss: 8.3241 - 7s/epoch - 52ms/step Epoch 4/100 128/128 - 7s - loss: 8.1545 - 7s/epoch - 51ms/step Epoch 5/100 128/128 - 7s - loss: 7.8082 - 7s/epoch - 52ms/step Epoch 6/100 128/128 - 7s - loss: 7.3336 - 7s/epoch - 52ms/step Epoch 7/100 128/128 - 7s - loss: 6.7790 - 7s/epoch - 51ms/step Epoch 8/100 128/128 - 7s - loss: 6.1823 - 7s/epoch - 52ms/step Epoch 9/100 128/128 - 7s - loss: 5.5900 - 7s/epoch - 52ms/step Epoch 10/100 128/128 - 7s - loss: 5.0163 - 7s/epoch - 51ms/step Epoch 11/100 128/128 - 7s - loss: 4.4567 - 7s/epoch - 52ms/step Epoch 12/100 128/128 - 7s - loss: 3.9323 - 7s/epoch - 52ms/step Epoch 13/100 128/128 - 7s - loss: 3.4422 - 7s/epoch - 51ms/step Epoch 14/100 128/128 - 7s - loss: 2.9918 - 7s/epoch - 52ms/step Epoch 15/100 128/128 - 7s - loss: 2.5750 - 7s/epoch - 52ms/step Epoch 16/100 128/128 - 7s - loss: 2.1964 - 7s/epoch - 52ms/step Epoch 17/100 128/128 - 7s - loss: 1.8682 - 7s/epoch - 52ms/step Epoch 18/100 128/128 - 7s - loss: 1.5784 - 7s/epoch - 52ms/step Epoch 19/100 128/128 - 7s - loss: 1.3308 - 7s/epoch - 51ms/step Epoch 20/100 128/128 - 7s - loss: 1.1199 - 7s/epoch - 52ms/step Epoch 21/100 128/128 - 7s - loss: 0.9449 - 7s/epoch - 51ms/step Epoch 22/100 128/128 - 7s - loss: 0.7964 - 7s/epoch - 52ms/step Epoch 23/100 128/128 - 7s - loss: 0.6743 - 7s/epoch - 51ms/step Epoch 24/100 128/128 - 7s - loss: 0.5716 - 7s/epoch - 51ms/step Epoch 25/100 128/128 - 7s - loss: 0.4872 - 7s/epoch - 52ms/step Epoch 26/100 128/128 - 7s - loss: 0.4164 - 7s/epoch - 52ms/step Epoch 27/100 128/128 - 7s - loss: 0.3584 - 7s/epoch - 53ms/step Epoch 28/100 128/128 - 7s - loss: 0.3133 - 7s/epoch - 51ms/step Epoch 29/100 128/128 - 7s - loss: 0.2754 - 7s/epoch - 52ms/step Epoch 30/100 128/128 - 7s - loss: 0.2451 - 7s/epoch - 51ms/step Epoch 31/100 128/128 - 7s - loss: 0.2176 - 7s/epoch - 52ms/step Epoch 32/100 128/128 - 7s - loss: 0.1961 - 7s/epoch - 52ms/step Epoch 33/100 128/128 - 7s - loss: 0.1791 - 7s/epoch - 52ms/step Epoch 34/100 128/128 - 7s - loss: 0.1647 - 7s/epoch - 52ms/step Epoch 35/100 128/128 - 7s - loss: 0.1518 - 7s/epoch - 52ms/step Epoch 36/100 128/128 - 7s - loss: 0.1405 - 7s/epoch - 52ms/step Epoch 37/100 128/128 - 7s - loss: 0.1311 - 7s/epoch - 51ms/step Epoch 38/100 128/128 - 7s - loss: 0.1236 - 7s/epoch - 52ms/step Epoch 39/100 128/128 - 7s - loss: 0.1162 - 7s/epoch - 52ms/step Epoch 40/100 128/128 - 7s - loss: 0.1099 - 7s/epoch - 52ms/step Epoch 41/100 128/128 - 7s - loss: 0.1048 - 7s/epoch - 52ms/step Epoch 42/100 128/128 - 7s - loss: 0.0988 - 7s/epoch - 52ms/step Epoch 43/100 128/128 - 7s - loss: 0.0960 - 7s/epoch - 52ms/step Epoch 44/100 128/128 - 7s - loss: 0.0917 - 7s/epoch - 52ms/step Epoch 45/100 128/128 - 7s - loss: 0.0877 - 7s/epoch - 52ms/step Epoch 46/100 128/128 - 7s - loss: 0.0846 - 7s/epoch - 52ms/step Epoch 47/100 128/128 - 7s - loss: 0.0815 - 7s/epoch - 52ms/step Epoch 48/100 128/128 - 7s - loss: 0.0786 - 7s/epoch - 51ms/step Epoch 49/100 128/128 - 7s - loss: 0.0765 - 7s/epoch - 52ms/step Epoch 50/100 128/128 - 7s - loss: 0.0754 - 7s/epoch - 52ms/step Epoch 51/100 128/128 - 7s - loss: 0.0728 - 7s/epoch - 51ms/step Epoch 52/100 128/128 - 7s - loss: 0.0710 - 7s/epoch - 52ms/step Epoch 53/100 128/128 - 7s - loss: 0.0691 - 7s/epoch - 52ms/step Epoch 54/100 128/128 - 7s - loss: 0.0687 - 7s/epoch - 51ms/step Epoch 55/100 128/128 - 7s - loss: 0.0669 - 7s/epoch - 52ms/step Epoch 56/100 128/128 - 7s - loss: 0.0649 - 7s/epoch - 52ms/step Epoch 57/100 128/128 - 7s - loss: 0.0634 - 7s/epoch - 52ms/step Epoch 58/100 128/128 - 7s - loss: 0.0625 - 7s/epoch - 52ms/step Epoch 59/100 128/128 - 7s - loss: 0.0618 - 7s/epoch - 52ms/step Epoch 60/100 128/128 - 7s - loss: 0.0615 - 7s/epoch - 51ms/step Epoch 61/100 128/128 - 7s - loss: 0.0605 - 7s/epoch - 52ms/step Epoch 62/100 128/128 - 7s - loss: 0.0587 - 7s/epoch - 52ms/step Epoch 63/100 128/128 - 7s - loss: 0.0582 - 7s/epoch - 52ms/step Epoch 64/100 128/128 - 7s - loss: 0.0607 - 7s/epoch - 52ms/step Epoch 65/100 128/128 - 7s - loss: 0.0587 - 7s/epoch - 52ms/step Epoch 66/100 128/128 - 7s - loss: 0.0574 - 7s/epoch - 52ms/step Epoch 67/100 128/128 - 7s - loss: 0.0582 - 7s/epoch - 52ms/step Epoch 68/100 128/128 - 7s - loss: 0.0599 - 7s/epoch - 52ms/step Epoch 69/100 128/128 - 7s - loss: 0.0584 - 7s/epoch - 52ms/step Epoch 70/100 128/128 - 7s - loss: 0.0563 - 7s/epoch - 52ms/step Epoch 71/100 128/128 - 7s - loss: 0.0552 - 7s/epoch - 52ms/step Epoch 72/100 128/128 - 7s - loss: 0.0537 - 7s/epoch - 52ms/step Epoch 73/100 128/128 - 7s - loss: 0.0538 - 7s/epoch - 51ms/step Epoch 74/100 128/128 - 7s - loss: 0.0530 - 7s/epoch - 52ms/step Epoch 75/100 128/128 - 7s - loss: 0.0534 - 7s/epoch - 52ms/step Epoch 76/100 128/128 - 7s - loss: 0.0526 - 7s/epoch - 52ms/step Epoch 77/100 128/128 - 7s - loss: 0.0521 - 7s/epoch - 52ms/step Epoch 78/100 128/128 - 7s - loss: 0.0524 - 7s/epoch - 52ms/step Epoch 79/100 128/128 - 7s - loss: 0.0521 - 7s/epoch - 53ms/step Epoch 80/100 128/128 - 7s - loss: 0.0512 - 7s/epoch - 55ms/step Epoch 81/100 128/128 - 7s - loss: 0.0510 - 7s/epoch - 52ms/step Epoch 82/100 128/128 - 7s - loss: 0.0510 - 7s/epoch - 52ms/step Epoch 83/100 128/128 - 7s - loss: 0.0506 - 7s/epoch - 52ms/step Epoch 84/100 128/128 - 7s - loss: 0.0499 - 7s/epoch - 52ms/step Epoch 85/100 128/128 - 7s - loss: 0.0500 - 7s/epoch - 52ms/step Epoch 86/100 128/128 - 7s - loss: 0.0506 - 7s/epoch - 52ms/step Epoch 87/100 128/128 - 7s - loss: 0.0495 - 7s/epoch - 52ms/step Epoch 88/100 128/128 - 7s - loss: 0.0507 - 7s/epoch - 52ms/step Epoch 89/100 128/128 - 7s - loss: 0.0531 - 7s/epoch - 52ms/step Epoch 90/100 128/128 - 7s - loss: 0.0613 - 7s/epoch - 52ms/step Epoch 91/100 128/128 - 7s - loss: 0.0633 - 7s/epoch - 52ms/step Epoch 92/100 128/128 - 7s - loss: 0.0573 - 7s/epoch - 52ms/step Epoch 93/100 128/128 - 7s - loss: 0.0513 - 7s/epoch - 52ms/step Epoch 94/100 128/128 - 7s - loss: 0.0493 - 7s/epoch - 53ms/step Epoch 95/100 128/128 - 7s - loss: 0.0488 - 7s/epoch - 52ms/step Epoch 96/100 128/128 - 7s - loss: 0.0490 - 7s/epoch - 52ms/step Epoch 97/100 128/128 - 7s - loss: 0.0480 - 7s/epoch - 52ms/step Epoch 98/100 128/128 - 7s - loss: 0.0482 - 7s/epoch - 52ms/step Epoch 99/100 128/128 - 7s - loss: 0.0482 - 7s/epoch - 52ms/step Epoch 100/100 128/128 - 7s - loss: 0.0482 - 7s/epoch - 52ms/step
<keras.src.callbacks.History at 0x238d7322d10>
In a large bowl, mixIn a large bowl, mix peel and slice. set aside. put in casserole dish. brush with remaining barbecue sauce.# Generate new recipe directions
seed_text = "In a large bowl, mix"
next_words = 3
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre') # Modify maxlen argument
predicted = model.predict(token_list, verbose=0) # Replace predict_classes with predict
predicted_word_index = np.argmax(predicted)
output_word = ""
for word, index in tokenizer.word_index.items():
if index == predicted_word_index:
output_word = word
break
seed_text += " " + output_word
# Print the generated recipe directions
print("Generated Directions:", seed_text)
Generated Directions: In a large bowl, mix peel and slice. set aside. put in casserole dish. brush with remaining barbecue sauce. conventional oven:
Get 2 tsp sugar andGet 2 tsp sugar and add water, frozen vegetables, potatoes and celery. turn cake out onto a heavy piece of aluminum foil which has been placed on a cookie sheet. bake in moderate oven until brown.# Generate new recipe directions
seed_text = "Get 2 tsp sugar and"
next_words = 3
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre') # Modify maxlen argument
predicted = model.predict(token_list, verbose=0) # Replace predict_classes with predict
predicted_word_index = np.argmax(predicted)
output_word = ""
for word, index in tokenizer.word_index.items():
if index == predicted_word_index:
output_word = word
break
seed_text += " " + output_word
# Print the generated recipe directions
print("Generated Directions:", seed_text)
Generated Directions: Get 2 tsp sugar and add water, frozen vegetables, potatoes and celery. turn cake out onto a heavy piece of aluminum foil which has been placed on a cookie sheet. bake in moderate oven until brown.
One possible use case is in the domain of recipe recommendation systems. By clustering recipes based on their ingredient similarity, we can identify groups of recipes that share common ingredients. This allows us to recommend alternative recipes to users based on their preferred ingredients or dietary restrictions. For example, if a user searches for a recipe containing chicken, the system can suggest other recipes from the same cluster that also feature chicken as a main ingredient.
Another use case is in the analysis of recipe databases or food websites. Clustering recipes based on ingredients helps to organize and categorize a large collection of recipes. This can assist in structuring recipe repositories, making it easier for users to navigate and search for specific types of recipes. It can also help identify popular ingredient combinations and culinary trends.
Additionally, the clustering results can provide insights into the relationships between different types of dishes or cuisines. By examining the clusters and their corresponding recipes, we can observe patterns and associations between ingredients used in specific culinary traditions or cultural contexts.
# Load the recipe dataset
recipe_data = df.head(45)
# Preprocess NER column to extract ingredients
# Text vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(recipe_data['NER'].astype(str))
# Clustering
n_clusters = 3
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(X)
# Dimensionality reduction
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X.toarray())
# Plot recipe clusters
colors = ['red', 'green', 'blue'] # Customize cluster colors
markers = ['o', 's', 'D'] # Customize cluster markers
plt.figure(figsize=(8, 6))
for i in range(n_clusters):
cluster_points = X_pca[kmeans.labels_ == i]
plt.scatter(cluster_points[:, 0], cluster_points[:, 1], c=colors[i], marker=markers[i], label=f'Cluster {i+1}')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Recipe Clusters')
plt.legend()
plt.show()
# Display recipe clusters
recipe_data['Cluster'] = kmeans.labels_
for cluster_id in range(n_clusters):
cluster_recipes = recipe_data[recipe_data['Cluster'] == cluster_id]
print(f"\nCluster {cluster_id+1} Recipes:")
for _, recipe in cluster_recipes.iterrows():
print(recipe['title'])
F:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning warnings.warn(
Cluster 1 Recipes: Nolan'S Pepper Steak Double Cherry Delight Quick Barbecue Wings Pink Stuff(Frozen Dessert) Fresh Strawberry Pie Cuddy Farms Marinated Turkey Spaghetti Sauce To Can Prize-Winning Meat Loaf Corral Barbecued Beef Steak Strips One Hour Rolls Punch Bowl Fruit Salad Summer Chicken Tuna Macaroni Casserole Artichoke Dip Summer Spaghetti Watermelon Rind Pickles Angel Biscuits Quick Peppermint Puffs Cluster 2 Recipes: No-Bake Nut Cookies Reeses Cups(Candy) Rhubarb Coffee Cake Millionaire Pie Buckeye Candy Easy German Chocolate Cake Broccoli Salad Eggless Milkless Applesauce Cake Grandma Hanrath'S Banana Breadfort Collins, Colorado Easy Fudge Cherry Pizza Chicken Stew Quick Coffee Cake(6 Servings) Fruit Pizza Cluster 3 Recipes: Jewell Ball'S Chicken Creamy Corn Chicken Funny Cheeseburger Potato Soup Scalloped Corn Taco Salad Chip Dip Strawberry Whatever Chocolate Frango Mints Smothered Round Steak(Servings: 4) Taco-Filled Green Pepper Potato And Cheese Pie Broccoli Dip For Crackers Pear-Lime Salad
C:\Users\Admin\AppData\Local\Temp\ipykernel_11224\1220178402.py:41: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy recipe_data['Cluster'] = kmeans.labels_